This is a Rmd file analyzing our raw count data by the glmmSeq package as described in the vignette and manual.
sessionInfo() #provides list of loaded packages and version of R. I still have version 4.1 for now.
## R version 4.3.0 (2023-04-21)
## Platform: aarch64-apple-darwin20 (64-bit)
## Running under: macOS Ventura 13.0
##
## Matrix products: default
## BLAS: /Library/Frameworks/R.framework/Versions/4.3-arm64/Resources/lib/libRblas.0.dylib
## LAPACK: /Library/Frameworks/R.framework/Versions/4.3-arm64/Resources/lib/libRlapack.dylib; LAPACK version 3.11.0
##
## locale:
## [1] en_US.UTF-8/en_US.UTF-8/en_US.UTF-8/C/en_US.UTF-8/en_US.UTF-8
##
## time zone: America/New_York
## tzcode source: internal
##
## attached base packages:
## [1] stats graphics grDevices utils datasets methods base
##
## loaded via a namespace (and not attached):
## [1] digest_0.6.33 R6_2.5.1 fastmap_1.1.1 xfun_0.39
## [5] cachem_1.0.8 knitr_1.42 htmltools_0.5.5 rmarkdown_2.21
## [9] cli_3.6.1 sass_0.4.6 jquerylib_0.1.4 compiler_4.3.0
## [13] rstudioapi_0.15.0 tools_4.3.0 evaluate_0.21 bslib_0.4.2
## [17] yaml_2.3.7 rlang_1.1.1 jsonlite_1.8.7
if ("rtracklayer" %in% rownames(installed.packages()) == 'FALSE') BiocManager::install("rtracklayer")
if ("goseq" %in% rownames(installed.packages()) == 'FALSE') BiocManager::install('goseq')
if ("rrvgo" %in% rownames(installed.packages()) == 'FALSE') BiocManager::install("rrvgo")
if ("GO.db" %in% rownames(installed.packages()) == 'FALSE') BiocManager::install("GO.db")
#BiocManager::install("org.Ce.eg.db", force=TRUE) #install if needed
library(dplyr)
##
## Attaching package: 'dplyr'
## The following objects are masked from 'package:stats':
##
## filter, lag
## The following objects are masked from 'package:base':
##
## intersect, setdiff, setequal, union
library(ggplot2)
library(goseq)
## Loading required package: BiasedUrn
## Loading required package: geneLenDataBase
## Warning: replacing previous import 'S4Arrays::read_block' by
## 'DelayedArray::read_block' when loading 'SummarizedExperiment'
##
library(rtracklayer)
## Loading required package: GenomicRanges
## Loading required package: stats4
## Loading required package: BiocGenerics
##
## Attaching package: 'BiocGenerics'
## The following objects are masked from 'package:dplyr':
##
## combine, intersect, setdiff, union
## The following objects are masked from 'package:stats':
##
## IQR, mad, sd, var, xtabs
## The following objects are masked from 'package:base':
##
## anyDuplicated, aperm, append, as.data.frame, basename, cbind,
## colnames, dirname, do.call, duplicated, eval, evalq, Filter, Find,
## get, grep, grepl, intersect, is.unsorted, lapply, Map, mapply,
## match, mget, order, paste, pmax, pmax.int, pmin, pmin.int,
## Position, rank, rbind, Reduce, rownames, sapply, setdiff, sort,
## table, tapply, union, unique, unsplit, which.max, which.min
## Loading required package: S4Vectors
##
## Attaching package: 'S4Vectors'
## The following object is masked from 'package:geneLenDataBase':
##
## unfactor
## The following objects are masked from 'package:dplyr':
##
## first, rename
## The following object is masked from 'package:utils':
##
## findMatches
## The following objects are masked from 'package:base':
##
## expand.grid, I, unname
## Loading required package: IRanges
##
## Attaching package: 'IRanges'
## The following objects are masked from 'package:dplyr':
##
## collapse, desc, slice
## Loading required package: GenomeInfoDb
library(rrvgo)
library(GO.db)
## Loading required package: AnnotationDbi
## Loading required package: Biobase
## Welcome to Bioconductor
##
## Vignettes contain introductory material; view with
## 'browseVignettes()'. To cite Bioconductor, see
## 'citation("Biobase")', and for packages 'citation("pkgname")'.
##
## Attaching package: 'AnnotationDbi'
## The following object is masked from 'package:dplyr':
##
## select
library(tidyr)
##
## Attaching package: 'tidyr'
## The following object is masked from 'package:S4Vectors':
##
## expand
library(forcats)
sessionInfo() #list of packages after library-ing these packages
## R version 4.3.0 (2023-04-21)
## Platform: aarch64-apple-darwin20 (64-bit)
## Running under: macOS Ventura 13.0
##
## Matrix products: default
## BLAS: /Library/Frameworks/R.framework/Versions/4.3-arm64/Resources/lib/libRblas.0.dylib
## LAPACK: /Library/Frameworks/R.framework/Versions/4.3-arm64/Resources/lib/libRlapack.dylib; LAPACK version 3.11.0
##
## locale:
## [1] en_US.UTF-8/en_US.UTF-8/en_US.UTF-8/C/en_US.UTF-8/en_US.UTF-8
##
## time zone: America/New_York
## tzcode source: internal
##
## attached base packages:
## [1] stats4 stats graphics grDevices utils datasets methods
## [8] base
##
## other attached packages:
## [1] forcats_1.0.0 tidyr_1.3.0 GO.db_3.17.0
## [4] AnnotationDbi_1.62.1 Biobase_2.60.0 rrvgo_1.12.0
## [7] rtracklayer_1.60.0 GenomicRanges_1.52.0 GenomeInfoDb_1.36.0
## [10] IRanges_2.34.0 S4Vectors_0.38.1 BiocGenerics_0.46.0
## [13] goseq_1.52.0 geneLenDataBase_1.36.0 BiasedUrn_2.0.9
## [16] ggplot2_3.4.2 dplyr_1.1.2
##
## loaded via a namespace (and not attached):
## [1] RColorBrewer_1.1-3 rstudioapi_0.15.0
## [3] jsonlite_1.8.7 umap_0.2.10.0
## [5] magrittr_2.0.3 GenomicFeatures_1.52.0
## [7] rmarkdown_2.21 BiocIO_1.10.0
## [9] zlibbioc_1.46.0 vctrs_0.6.3
## [11] memoise_2.0.1 Rsamtools_2.15.2
## [13] RCurl_1.98-1.12 askpass_1.1
## [15] htmltools_0.5.5 S4Arrays_1.0.4
## [17] progress_1.2.2 curl_5.0.0
## [19] sass_0.4.6 bslib_0.4.2
## [21] cachem_1.0.8 GenomicAlignments_1.36.0
## [23] igraph_1.4.2 mime_0.12
## [25] lifecycle_1.0.3 pkgconfig_2.0.3
## [27] Matrix_1.6-0 R6_2.5.1
## [29] fastmap_1.1.1 GenomeInfoDbData_1.2.10
## [31] MatrixGenerics_1.12.0 shiny_1.7.4
## [33] digest_0.6.33 colorspace_2.1-0
## [35] RSpectra_0.16-1 RSQLite_2.3.1
## [37] filelock_1.0.2 fansi_1.0.4
## [39] httr_1.4.6 mgcv_1.8-42
## [41] compiler_4.3.0 bit64_4.0.5
## [43] withr_2.5.0 BiocParallel_1.34.1
## [45] DBI_1.1.3 biomaRt_2.56.0
## [47] openssl_2.0.6 rappdirs_0.3.3
## [49] DelayedArray_0.26.2 rjson_0.2.21
## [51] tools_4.3.0 httpuv_1.6.11
## [53] glue_1.6.2 restfulr_0.0.15
## [55] nlme_3.1-162 GOSemSim_2.26.0
## [57] promises_1.2.0.1 grid_4.3.0
## [59] gridBase_0.4-7 generics_0.1.3
## [61] gtable_0.3.3 data.table_1.14.8
## [63] hms_1.1.3 xml2_1.3.4
## [65] utf8_1.2.3 XVector_0.40.0
## [67] ggrepel_0.9.3 pillar_1.9.0
## [69] stringr_1.5.0 later_1.3.1
## [71] splines_4.3.0 BiocFileCache_2.8.0
## [73] lattice_0.21-8 bit_4.0.5
## [75] tidyselect_1.2.0 tm_0.7-11
## [77] Biostrings_2.68.1 knitr_1.42
## [79] NLP_0.2-1 SummarizedExperiment_1.30.1
## [81] xfun_0.39 matrixStats_0.63.0
## [83] pheatmap_1.0.12 stringi_1.7.12
## [85] yaml_2.3.7 evaluate_0.21
## [87] codetools_0.2-19 wordcloud_2.6
## [89] tibble_3.2.1 cli_3.6.1
## [91] xtable_1.8-4 reticulate_1.28
## [93] munsell_0.5.0 jquerylib_0.1.4
## [95] treemap_2.4-3 Rcpp_1.0.11
## [97] dbplyr_2.3.2 png_0.1-8
## [99] XML_3.99-0.14 parallel_4.3.0
## [101] ellipsis_0.3.2 blob_1.2.4
## [103] prettyunits_1.1.1 bitops_1.0-7
## [105] slam_0.1-50 scales_1.2.1
## [107] purrr_1.0.1 crayon_1.5.2
## [109] rlang_1.1.1 KEGGREST_1.40.0
DEGs <- read.csv(file="../../../output/Slope_Base/signif_genes_normcts.csv", sep=',', header=TRUE) %>% dplyr::select(!c('X'))
#NOTE! This is not a file only with differentially expressed genes, this contains all of the genes in our dataset but also contains p-value information and fold change information to help determine which genes are signficant DEGs based on our model in glmmSeq
rownames(DEGs) <- DEGs$Gene
dim(DEGs)
## [1] 9011 75
Origin_DEGs <- DEGs %>% dplyr::filter(Origin < 0.05)
nrow(Origin_DEGs)
## [1] 840
genes <- rownames(DEGs)
Based off of Ariana’s script
Functional annotation file obtained from: http://cyanophora.rutgers.edu/Pocillopora_acuta/Pocillopora_acuta_HIv2.genes.EggNog_results.txt.gz on April 3, 2023.
Pacuta.annot <- read.delim("../../../data/Pocillopora_acuta_HIv2.genes.EggNog_results.txt") %>% dplyr::rename("query" = X.query)
dim(Pacuta.annot)
## [1] 16784 21
head(Pacuta.annot)
## query seed_ortholog evalue
## 1 Pocillopora_acuta_HIv2___RNAseq.g24143.t1a 45351.EDO48725 2.16e-120
## 2 Pocillopora_acuta_HIv2___RNAseq.g18333.t1 45351.EDO38694 3.18e-123
## 3 Pocillopora_acuta_HIv2___RNAseq.g7985.t1 192875.XP_004345759.1 1.70e-183
## 4 Pocillopora_acuta_HIv2___RNAseq.g13511.t1 45351.EDO28722 2.94e-48
## 5 Pocillopora_acuta_HIv2___TS.g15308.t1 10224.XP_006813307.1 3.19e-20
## 6 Pocillopora_acuta_HIv2___RNAseq.g2057.t1 106582.XP_004573970.1 3.70e-14
## score
## 1 364.0
## 2 355.0
## 3 526.0
## 4 172.0
## 5 92.4
## 6 68.2
## eggNOG_OGs
## 1 COG0620@1|root,KOG2263@2759|Eukaryota,38GZS@33154|Opisthokonta,3BNKS@33208|Metazoa
## 2 COG0450@1|root,KOG0852@2759|Eukaryota,38B9P@33154|Opisthokonta,3BGS4@33208|Metazoa
## 3 COG3239@1|root,KOG4232@2759|Eukaryota,39UMW@33154|Opisthokonta
## 4 2ED36@1|root,2SITK@2759|Eukaryota,39IIK@33154|Opisthokonta,3C3PY@33208|Metazoa
## 5 COG2801@1|root,KOG0017@2759|Eukaryota,38F42@33154|Opisthokonta,3BA5H@33208|Metazoa
## 6 2CSTD@1|root,2S4A7@2759|Eukaryota,3A79X@33154|Opisthokonta,3BT5E@33208|Metazoa,3D9B1@33213|Bilateria,48FVM@7711|Chordata,49CYZ@7742|Vertebrata,4A886@7898|Actinopterygii
## max_annot_lvl COG_category
## 1 33208|Metazoa E
## 2 33208|Metazoa O
## 3 33154|Opisthokonta I
## 4 33208|Metazoa -
## 5 33208|Metazoa OU
## 6 33208|Metazoa S
## Description Preferred_name
## 1 Cobalamin-independent synthase, Catalytic domain -
## 2 negative regulation of male germ cell proliferation PRDX4
## 3 Fatty acid desaturase -
## 4 - -
## 5 K02A2.6-like -
## 6 Phosphatidylinositol N-acetylglucosaminyltransferase subunit Y PIGY
## GOs
## 1 -
## 2 GO:0000003,GO:0001775,GO:0002252,GO:0002263,GO:0002274,GO:0002275,GO:0002283,GO:0002366,GO:0002376,GO:0002443,GO:0002444,GO:0002446,GO:0003006,GO:0003674,GO:0003824,GO:0004601,GO:0005488,GO:0005515,GO:0005575,GO:0005576,GO:0005615,GO:0005622,GO:0005623,GO:0005737,GO:0005783,GO:0005790,GO:0005829,GO:0006082,GO:0006457,GO:0006464,GO:0006468,GO:0006520,GO:0006575,GO:0006793,GO:0006796,GO:0006807,GO:0006810,GO:0006887,GO:0006915,GO:0006950,GO:0006952,GO:0006955,GO:0006979,GO:0007154,GO:0007165,GO:0007249,GO:0007252,GO:0007275,GO:0007276,GO:0007283,GO:0007548,GO:0008150,GO:0008152,GO:0008219,GO:0008285,GO:0008379,GO:0008406,GO:0008584,GO:0009056,GO:0009266,GO:0009409,GO:0009605,GO:0009607,GO:0009617,GO:0009628,GO:0009636,GO:0009893,GO:0009966,GO:0009967,GO:0009987,GO:0010467,GO:0010604,GO:0010646,GO:0010647,GO:0010941,GO:0010942,GO:0010950,GO:0010952,GO:0012501,GO:0012505,GO:0016043,GO:0016192,GO:0016209,GO:0016310,GO:0016491,GO:0016684,GO:0016999,GO:0017001,GO:0017144,GO:0019222,GO:0019471,GO:0019538,GO:0019725,GO:0019752,GO:0019953,GO:0022414,GO:0022417,GO:0023051,GO:0023052,GO:0023056,GO:0030141,GO:0030162,GO:0030198,GO:0031323,GO:0031325,GO:0031410,GO:0031974,GO:0031982,GO:0031983,GO:0032268,GO:0032270,GO:0032501,GO:0032502,GO:0032504,GO:0032940,GO:0033554,GO:0034774,GO:0035556,GO:0036211,GO:0036230,GO:0042119,GO:0042127,GO:0042221,GO:0042592,GO:0042737,GO:0042742,GO:0042743,GO:0042744,GO:0042802,GO:0042803,GO:0042981,GO:0043062,GO:0043065,GO:0043067,GO:0043068,GO:0043085,GO:0043170,GO:0043207,GO:0043226,GO:0043227,GO:0043229,GO:0043231,GO:0043233,GO:0043280,GO:0043281,GO:0043299,GO:0043312,GO:0043412,GO:0043436,GO:0043900,GO:0043901,GO:0044093,GO:0044237,GO:0044238,GO:0044248,GO:0044260,GO:0044267,GO:0044281,GO:0044421,GO:0044422,GO:0044424,GO:0044433,GO:0044444,GO:0044446,GO:0044464,GO:0044703,GO:0045055,GO:0045137,GO:0045321,GO:0045454,GO:0045862,GO:0046425,GO:0046427,GO:0046546,GO:0046661,GO:0046903,GO:0046983,GO:0048232,GO:0048513,GO:0048518,GO:0048519,GO:0048522,GO:0048523,GO:0048583,GO:0048584,GO:0048608,GO:0048609,GO:0048731,GO:0048856,GO:0050789,GO:0050790,GO:0050794,GO:0050896,GO:0051171,GO:0051173,GO:0051179,GO:0051186,GO:0051187,GO:0051234,GO:0051239,GO:0051241,GO:0051246,GO:0051247,GO:0051336,GO:0051345,GO:0051604,GO:0051704,GO:0051707,GO:0051716,GO:0051920,GO:0052547,GO:0052548,GO:0055114,GO:0060205,GO:0060255,GO:0061458,GO:0065007,GO:0065008,GO:0065009,GO:0070013,GO:0070417,GO:0070887,GO:0071704,GO:0071840,GO:0072593,GO:0080090,GO:0097190,GO:0097237,GO:0097708,GO:0098542,GO:0098754,GO:0098869,GO:0099503,GO:0101002,GO:1901564,GO:1901605,GO:1902531,GO:1902533,GO:1904813,GO:1904892,GO:1904894,GO:1905936,GO:1905937,GO:1990748,GO:2000116,GO:2000241,GO:2000242,GO:2000254,GO:2000255,GO:2001056,GO:2001233,GO:2001235,GO:2001267,GO:2001269
## 3 -
## 4 -
## 5 -
## 6 GO:0000506,GO:0005575,GO:0005622,GO:0005623,GO:0005737,GO:0005783,GO:0005789,GO:0005886,GO:0006464,GO:0006497,GO:0006505,GO:0006506,GO:0006629,GO:0006643,GO:0006644,GO:0006650,GO:0006661,GO:0006664,GO:0006793,GO:0006796,GO:0006807,GO:0008150,GO:0008152,GO:0008610,GO:0008654,GO:0009058,GO:0009059,GO:0009247,GO:0009987,GO:0012505,GO:0016020,GO:0016254,GO:0019538,GO:0019637,GO:0031984,GO:0032991,GO:0034645,GO:0036211,GO:0042157,GO:0042158,GO:0042175,GO:0043170,GO:0043226,GO:0043227,GO:0043229,GO:0043231,GO:0043412,GO:0044237,GO:0044238,GO:0044249,GO:0044255,GO:0044260,GO:0044267,GO:0044422,GO:0044424,GO:0044425,GO:0044432,GO:0044444,GO:0044446,GO:0044464,GO:0045017,GO:0046467,GO:0046474,GO:0046486,GO:0046488,GO:0071704,GO:0071944,GO:0090407,GO:0098796,GO:0098827,GO:1901135,GO:1901137,GO:1901564,GO:1901566,GO:1901576,GO:1902494,GO:1903509,GO:1990234
## EC KEGG_ko
## 1 2.1.1.14 ko:K00549
## 2 1.11.1.15 ko:K03386
## 3 1.14.19.31 ko:K12418
## 4 - -
## 5 - -
## 6 - ko:K11001
## KEGG_Pathway
## 1 ko00270,ko00450,ko01100,ko01110,ko01230,map00270,map00450,map01100,map01110,map01230
## 2 ko04214,map04214
## 3 -
## 4 -
## 5 -
## 6 ko00563,ko01100,map00563,map01100
## KEGG_Module KEGG_Reaction KEGG_rclass
## 1 M00017 R04405,R09365 RC00035,RC00113,RC01241
## 2 - - -
## 3 - - -
## 4 - - -
## 5 - - -
## 6 - R05916 -
## BRITE KEGG_TC CAZy BiGG_Reaction
## 1 ko00000,ko00001,ko00002,ko01000 - - -
## 2 ko00000,ko00001,ko01000,ko04147 - - -
## 3 ko00000,ko01000,ko01004 - - -
## 4 - - - -
## 5 - - - -
## 6 ko00000,ko00001 - - -
## PFAMs
## 1 Meth_synt_2
## 2 1-cysPrx_C,AhpC-TSA
## 3 Cyt-b5,FA_desaturase
## 4 -
## 5 RVT_1,rve
## 6 PIG-Y
genes2annot = match(genes, Pacuta.annot$query) #match genes in DEGs (all genes after filtering) to genes in annotation file
sum(is.na(genes2annot)) #number of genes without EggNog annotation
## [1] 2812
missing<-as.data.frame(genes[which(is.na(genes2annot))]) #dataframe of genes without EggNog annotation
head(missing)
## genes[which(is.na(genes2annot))]
## 1 Pocillopora_acuta_HIv2___RNAseq.g7479.t3a
## 2 Pocillopora_acuta_HIv2___RNAseq.g7479.t3b
## 3 Pocillopora_acuta_HIv2___RNAseq.g682.t1
## 4 Pocillopora_acuta_HIv2___RNAseq.g4805.t1
## 5 Pocillopora_acuta_HIv2___RNAseq.g11061.t1
## 6 Pocillopora_acuta_HIv2___RNAseq.g16217.t1
2812/9011 genes without eggnog annotation
names(Pacuta.annot)
## [1] "query" "seed_ortholog" "evalue" "score"
## [5] "eggNOG_OGs" "max_annot_lvl" "COG_category" "Description"
## [9] "Preferred_name" "GOs" "EC" "KEGG_ko"
## [13] "KEGG_Pathway" "KEGG_Module" "KEGG_Reaction" "KEGG_rclass"
## [17] "BRITE" "KEGG_TC" "CAZy" "BiGG_Reaction"
## [21] "PFAMs"
geneInfo0 = data.frame(gene_id = genes, #add gene id
Accession = Pacuta.annot$seed_ortholog[genes2annot], #add accession number
Bitscore = Pacuta.annot$score[genes2annot], #add bitscore
eValue = Pacuta.annot$evalue[genes2annot], #add e value
Description = Pacuta.annot$Description[genes2annot], #add description of gene
Annotation.GO.ID = Pacuta.annot$GOs[genes2annot], #add GO ID's
q_Origin = DEGs$Origin, #add Origin adjusted p-value
q_Treatment = DEGs$Treatment, #add Treatment adjusted p-value
q_Interaction = DEGs$Treatment.Origin, #add Treatment:Origin adjusted p-value
Stable_OriginFC = DEGs$Stable_OriginFC, #add fold change for Slope vs Flat in the stable treatment
Variable_OriginFC = DEGs$Variable_OriginFC, #add fold change for Slope vs Flat in the variable treatment
maxGroupFC = DEGs$maxGroupFC, #add max group fold change (was FC bigger in stable of variable treatment)
col = DEGs$col) #add qualitative significance info
dim(geneInfo0)
## [1] 9011 13
head(geneInfo0,2)
## gene_id Accession Bitscore eValue
## 1 Pocillopora_acuta_HIv2___RNAseq.g27841.t1 45351.EDO35402 566 4.00e-197
## 2 Pocillopora_acuta_HIv2___RNAseq.g14011.t1 45351.EDO48592 449 1.17e-151
## Description
## 1 cyclin-dependent protein serine/threonine kinase activity
## 2 TAF6-like RNA polymerase II, p300 CBP-associated
## Annotation.GO.ID
## 1 GO:0000003,GO:0003006,GO:0003674,GO:0003824,GO:0004672,GO:0004674,GO:0004693,GO:0005575,GO:0005622,GO:0005623,GO:0005634,GO:0005737,GO:0005813,GO:0005815,GO:0005856,GO:0006464,GO:0006468,GO:0006793,GO:0006796,GO:0006807,GO:0007154,GO:0007165,GO:0007548,GO:0008150,GO:0008152,GO:0009987,GO:0015630,GO:0016301,GO:0016310,GO:0016740,GO:0016772,GO:0016773,GO:0019538,GO:0022414,GO:0023052,GO:0032502,GO:0036211,GO:0043170,GO:0043226,GO:0043227,GO:0043228,GO:0043229,GO:0043231,GO:0043232,GO:0043412,GO:0044237,GO:0044238,GO:0044260,GO:0044267,GO:0044422,GO:0044424,GO:0044430,GO:0044446,GO:0044464,GO:0050789,GO:0050794,GO:0050896,GO:0051716,GO:0051726,GO:0065007,GO:0071704,GO:0097472,GO:0140096,GO:1901564
## 2 GO:0000118,GO:0000123,GO:0003674,GO:0003676,GO:0003677,GO:0003712,GO:0003713,GO:0005488,GO:0005575,GO:0005622,GO:0005623,GO:0005634,GO:0005654,GO:0005730,GO:0006325,GO:0006338,GO:0006355,GO:0006357,GO:0006464,GO:0006473,GO:0006475,GO:0006807,GO:0006996,GO:0008150,GO:0008152,GO:0009889,GO:0009891,GO:0009893,GO:0009987,GO:0010468,GO:0010556,GO:0010557,GO:0010604,GO:0016043,GO:0016569,GO:0016570,GO:0016573,GO:0016604,GO:0016607,GO:0018193,GO:0018205,GO:0018393,GO:0018394,GO:0019219,GO:0019222,GO:0019538,GO:0030914,GO:0031248,GO:0031323,GO:0031325,GO:0031326,GO:0031328,GO:0031974,GO:0031981,GO:0032991,GO:0036211,GO:0043170,GO:0043226,GO:0043227,GO:0043228,GO:0043229,GO:0043231,GO:0043232,GO:0043233,GO:0043412,GO:0043543,GO:0043966,GO:0044237,GO:0044238,GO:0044260,GO:0044267,GO:0044422,GO:0044424,GO:0044428,GO:0044446,GO:0044451,GO:0044464,GO:0045935,GO:0048518,GO:0048522,GO:0050789,GO:0050794,GO:0051171,GO:0051173,GO:0051252,GO:0051254,GO:0051276,GO:0060255,GO:0065007,GO:0070013,GO:0070461,GO:0071704,GO:0071840,GO:0080090,GO:0097159,GO:0140110,GO:1901363,GO:1901564,GO:1902493,GO:1902494,GO:1902680,GO:1903506,GO:1903508,GO:1990234,GO:2000112,GO:2001141
## q_Origin q_Treatment q_Interaction Stable_OriginFC Variable_OriginFC
## 1 0.3325688 1 0.9994279 -0.2177817 -0.1121968
## 2 0.1514413 1 0.9994279 0.6976765 0.2428565
## maxGroupFC col
## 1 Stable Not Significant
## 2 Stable Not Significant
Add KEGG annotation information. Downloaded from: http://cyanophora.rutgers.edu/Pocillopora_acuta/Pocillopora_acuta_HIv2.genes.KEGG_results.txt.gz on April 3, 2023.
kegg<-read.table("../../../data/Pocillopora_acuta_HIv2.genes.KEGG_results.txt", sep="", quote="", na.strings=c("","NA"), blank.lines.skip = FALSE, header=FALSE)
dim(kegg)
## [1] 33730 2
head(kegg)
## V1 V2
## 1 Pocillopora_acuta_HIv2___RNAseq.g24143.t1a <NA>
## 2 Pocillopora_acuta_HIv2___RNAseq.g24143.t1b K22584
## 3 Pocillopora_acuta_HIv2___RNAseq.g22918.t1 <NA>
## 4 Pocillopora_acuta_HIv2___RNAseq.g18333.t1 K03386
## 5 Pocillopora_acuta_HIv2___RNAseq.g7985.t1 <NA>
## 6 Pocillopora_acuta_HIv2___RNAseq.g13511.t1 <NA>
Add KEGG annotations to each gene.
geneInfo0$KEGG <- kegg$V2[match(geneInfo0$gene_id, kegg$V1)]
sum(is.na(geneInfo0$KEGG)) #number of genes without KEGG annotation
## [1] 3828
missing_KEGG <- as.data.frame(genes[which(is.na(geneInfo0$KEGG))]) #dataframe of genes without EggNog annotation
head(missing_KEGG)
## genes[which(is.na(geneInfo0$KEGG))]
## 1 Pocillopora_acuta_HIv2___RNAseq.g7479.t3a
## 2 Pocillopora_acuta_HIv2___RNAseq.g7479.t3b
## 3 Pocillopora_acuta_HIv2___RNAseq.g682.t1
## 4 Pocillopora_acuta_HIv2___RNAseq.g4805.t1
## 5 Pocillopora_acuta_HIv2___RNAseq.g11061.t1
## 6 Pocillopora_acuta_HIv2___TS.g24370.t1
Order geneInfo0 by significance of Origin, q_Origin (adjusted p value)
geneInfo <- geneInfo0[order(geneInfo0[, 'q_Origin']), ]
write.csv(geneInfo, file = "../../../output/Slope_Base/geneInfo.csv") #gene info for reference/supplement
#geneInfo<-read.csv("../../../output/Slope_Base/geneInfo.csv")
dim(geneInfo)
## [1] 9011 14
Get gene length information.
#import file
gff <- rtracklayer::import("../../../data/Pocillopora_acuta_HIv2.genes_fixed.gff3") #if this doesn't work, restart R and try again
transcripts <- subset(gff, type == "transcript") #keep only transcripts , not CDS or exons
transcript_lengths <- width(transcripts) #isolate length of each gene
seqnames<-transcripts$ID #extract list of gene id
lengths<-cbind(seqnames, transcript_lengths)
lengths<-as.data.frame(lengths) #convert to data frame
geneInfo$Length<-lengths$transcript_lengths[match(geneInfo$gene_id, lengths$seqnames)] #Add in length to geneInfo
Format GO terms to remove dashes and quotes and separate by semicolons (replace , with ;) in Annotation.GO.ID column
geneInfo$Annotation.GO.ID <- gsub(",", ";", geneInfo$Annotation.GO.ID)
geneInfo$Annotation.GO.ID <- gsub('"', "", geneInfo$Annotation.GO.ID)
geneInfo$Annotation.GO.ID <- gsub("-", NA, geneInfo$Annotation.GO.ID)
### Generate vector with names of all genes
ALL.vector <- c(geneInfo$gene_id)
### Generate length vector for all genes
LENGTH.vector <- as.integer(geneInfo$Length)
### Generate vector with names of the 840 significant DEGs by Origin
ID.vector <- geneInfo %>%
filter(q_Origin < 0.05) %>%
pull(gene_id)
##Get a list of GO Terms for the all 9011 genes
GO.terms <- geneInfo %>%
dplyr::select(gene_id, Annotation.GO.ID)
##Format to have one goterm per row with gene ID repeated
split <- strsplit(as.character(GO.terms$Annotation.GO.ID), ";")
split2 <- data.frame(v1 = rep.int(GO.terms$gene, sapply(split, length)), v2 = unlist(split))
colnames(split2) <- c("gene", "Annotation.GO.ID")
GO.terms <- split2
dim(GO.terms)
## [1] 705483 2
##Construct list of genes with 1 for genes in that are significant for Origin and 0 for those that are not
gene.vector = as.integer(ALL.vector %in% ID.vector) #since we ordered geneInfo by q_Origin, this puts a "1" for the first 840 genes, which are the same genes in ID.vector
names(gene.vector) <- ALL.vector #set names
#weight gene vector by bias for length of gene
pwf <- nullp(gene.vector, ID.vector, bias.data = LENGTH.vector)
#how many go terms from the 840 genes
##Get a list of GO Terms for the 840 DE genes
GO.terms_DE <- geneInfo %>% filter(q_Origin < 0.05) %>%
dplyr::select(gene_id, Annotation.GO.ID)
##Format to have one goterm per row with gene ID repeated
split <- strsplit(as.character(GO.terms_DE$Annotation.GO.ID), ";")
split2 <- data.frame(v1 = rep.int(GO.terms_DE$gene, sapply(split, length)), v2 = unlist(split))
colnames(split2) <- c("gene", "Annotation.GO.ID")
GO.terms_DE <- split2
dim(GO.terms_DE)
## [1] 46011 2
#run goseq using Wallenius method for all categories of GO terms
GO.wall <- goseq(pwf, ID.vector, gene2cat=GO.terms, method="Wallenius", use_genes_without_cat=TRUE)
## Using manually entered categories.
## Calculating the p-values...
## 'select()' returned 1:1 mapping between keys and columns
GO <- GO.wall[order(GO.wall$over_represented_pvalue),]
colnames(GO)[1] <- "GOterm"
#Write file of results
write.csv(GO, "../../../output/Slope_Base/GOSeq/GOseq_DEG_Origin.csv")
#Filtering for p < 0.05
GO_05 <- GO %>%
dplyr::filter(over_represented_pvalue<0.05) %>%
dplyr::arrange(., ontology, over_represented_pvalue)
#Filtering for p < 0.05
GO_001 <- GO %>%
dplyr::filter(over_represented_pvalue<0.001) %>%
dplyr::arrange(., ontology, over_represented_pvalue)
dim(GO_05)
## [1] 219 7
head(GO_05)
## GOterm over_represented_pvalue under_represented_pvalue numDEInCat
## 1 GO:0086036 0.0004981949 0.9999834 4
## 2 GO:0045939 0.0013830178 0.9998741 5
## 3 GO:0009151 0.0018919061 0.9999601 3
## 4 GO:0009215 0.0018919061 0.9999601 3
## 5 GO:0032963 0.0019057769 0.9998764 4
## 6 GO:0070268 0.0019565929 0.9998722 4
## numInCat term ontology
## 1 6 regulation of cardiac muscle cell membrane potential BP
## 2 11 negative regulation of steroid metabolic process BP
## 3 4 purine deoxyribonucleotide metabolic process BP
## 4 4 purine deoxyribonucleoside triphosphate metabolic process BP
## 5 8 collagen metabolic process BP
## 6 8 cornification BP
dim(GO_001)
## [1] 2 7
GO_001
## GOterm over_represented_pvalue under_represented_pvalue numDEInCat
## 1 GO:0086036 0.0004981949 0.9999834 4
## 2 GO:0051187 0.0001070349 0.9999878 8
## numInCat term ontology
## 1 6 regulation of cardiac muscle cell membrane potential BP
## 2 19 <NA> <NA>
Plotting!
ontologies<-c("BP", "CC", "MF")
for (category in ontologies) {
go_results <- GO_05
go_results<-go_results%>%
filter(ontology==category)%>%
filter(over_represented_pvalue != "NA") %>%
filter(numInCat>10)%>%
arrange(., over_represented_pvalue)
#Reduce/collapse GO term set with the rrvgo package
simMatrix <- calculateSimMatrix(go_results$GOterm,
orgdb="org.Ce.eg.db", #c. elegans database
ont=category,
method="Rel")
#calculate similarity
scores <- setNames(-log(go_results$over_represented_pvalue), go_results$GOterm)
reducedTerms <- reduceSimMatrix(simMatrix,
scores,
threshold=0.7,
orgdb="org.Ce.eg.db")
#keep only the goterms from the reduced list
go_results<-go_results%>%
filter(GOterm %in% reducedTerms$go)
#add in parent terms to list of go terms
go_results$ParentTerm<-reducedTerms$parentTerm[match(go_results$GOterm, reducedTerms$go)]
#plot significantly enriched GO terms by Slim Category faceted by slim term
GO.plot <- ggplot2::ggplot(go_results, aes(x = ontology, y = term)) +
geom_point(aes(size=over_represented_pvalue)) +
scale_size(name="Over rep. p-value", trans="reverse", range=c(1,3))+
facet_grid(ParentTerm ~ ., scales = "free", labeller = label_wrap_gen(width = 5, multi_line = TRUE))+
theme_bw() + theme(panel.border = element_blank(), panel.grid.major = element_blank(),
panel.grid.minor = element_blank(), axis.line = element_line(colour = "black"),
strip.text.y = element_text(angle=0, size = 10),
strip.text.x = element_text(size = 20),
axis.text = element_text(size = 8),
axis.title.x = element_blank(),
axis.title.y = element_blank())
print(GO.plot)
ggsave(filename=paste0("../../../output/Slope_Base/GOSeq/OriginDEGs_P05", "_", category, ".png"), plot=GO.plot, dpi=300, height=10, units="in", limitsize=FALSE)
}
##
## preparing gene to GO mapping data...
## preparing IC data...
## Saving 7 x 10 in image
## preparing gene to GO mapping data...
##
## preparing IC data...
## Saving 7 x 10 in image
## preparing gene to GO mapping data...
##
## preparing IC data...
## Saving 7 x 10 in image
GO_plot_all <- GO_001 %>% drop_na(ontology) %>% mutate(term = fct_reorder(term, numDEInCat)) %>%
mutate(term = fct_reorder(term, ontology)) %>%
ggplot( aes(x=term, y=numDEInCat) ) +
geom_segment( aes(x=term ,xend=term, y=0, yend=numDEInCat), color="grey") +
geom_text(aes(label = over_represented_pvalue), hjust = -1, vjust = 0, size = 2) +
geom_point(size=1, aes(colour = ontology)) +
coord_flip() +
ylim(0,305) +
theme(
panel.grid.minor.y = element_blank(),
panel.grid.major.y = element_blank(),
legend.position="bottom"
) +
xlab("") +
ylab("") +
theme_bw() + #Set background color
theme(panel.border = element_blank(), # Set border
panel.grid.major = element_blank(), #Set major gridlines
panel.grid.minor = element_blank(), #Set minor gridlines
axis.line = element_line(colour = "black"), #Set axes color
plot.background=element_blank()) #Set the plot background #set title attributes
GO_plot_all
ggsave("../../../output/Slope_Base/GOSeq/OriginDEGs_P001.pdf", GO_plot_all)
## Saving 7 x 5 in image
#ordered by p-value
GO_plot_all_pval <- GO_001 %>% drop_na(ontology) %>% mutate(term = fct_reorder(term, over_represented_pvalue)) %>%
mutate(term = fct_reorder(term, ontology)) %>%
ggplot( aes(x=term, y=numDEInCat) ) +
geom_segment( aes(x=term ,xend=term, y=0, yend=numDEInCat), color="grey") +
geom_text(aes(label = over_represented_pvalue), hjust = -1, vjust = 0, size = 2) +
geom_point(size=1, aes(colour = ontology)) +
coord_flip() +
ylim(0,305) +
theme(
panel.grid.minor.y = element_blank(),
panel.grid.major.y = element_blank(),
legend.position="bottom"
) +
xlab("") +
ylab("") +
theme_bw() + #Set background color
theme(panel.border = element_blank(), # Set border
panel.grid.major = element_blank(), #Set major gridlines
panel.grid.minor = element_blank(), #Set minor gridlines
axis.line = element_line(colour = "black"), #Set axes color
plot.background=element_blank()) #Set the plot background #set title attributes
GO_plot_all_pval
ggsave("../../../output/Slope_Base/GOSeq/OriginDEGs_P001_orderedpval.pdf", GO_plot_all_pval)
## Saving 7 x 5 in image
GO_plot_all <- GO_05 %>% drop_na(ontology) %>% mutate(term = fct_reorder(term, numDEInCat)) %>%
mutate(term = fct_reorder(term, ontology)) %>%
ggplot( aes(x=term, y=numDEInCat) ) +
geom_segment( aes(x=term ,xend=term, y=0, yend=numDEInCat), color="grey") +
geom_text(aes(label = over_represented_pvalue), hjust = -1, vjust = 0, size = 2) +
geom_point(size=1, aes(colour = ontology)) +
coord_flip() +
ylim(0,305) +
theme(
panel.grid.minor.y = element_blank(),
panel.grid.major.y = element_blank(),
legend.position="bottom"
) +
xlab("") +
ylab("") +
theme_bw() + #Set background color
theme(panel.border = element_blank(), # Set border
panel.grid.major = element_blank(), #Set major gridlines
panel.grid.minor = element_blank(), #Set minor gridlines
axis.line = element_line(colour = "black"), #Set axes color
plot.background=element_blank()) #Set the plot background #set title attributes
GO_plot_all
ggsave("../../../output/Slope_Base/GOSeq/OriginDEGs_P05.pdf", GO_plot_all, width = 12, height = 24, units = c("in"), dpi=300, limitsize=FALSE)
#ordered by p-value
GO_plot_all_pval <- GO_05 %>% drop_na(ontology) %>% mutate(term = fct_reorder(term, over_represented_pvalue)) %>%
mutate(term = fct_reorder(term, ontology)) %>%
ggplot( aes(x=term, y=numDEInCat) ) +
geom_segment( aes(x=term ,xend=term, y=0, yend=numDEInCat), color="grey") +
geom_text(aes(label = over_represented_pvalue), hjust = -1, vjust = 0, size = 2) +
geom_point(size=1, aes(colour = ontology)) +
coord_flip() +
ylim(0,305) +
theme(
panel.grid.minor.y = element_blank(),
panel.grid.major.y = element_blank(),
legend.position="bottom"
) +
xlab("") +
ylab("") +
theme_bw() + #Set background color
theme(panel.border = element_blank(), # Set border
panel.grid.major = element_blank(), #Set major gridlines
panel.grid.minor = element_blank(), #Set minor gridlines
axis.line = element_line(colour = "black"), #Set axes color
plot.background=element_blank()) #Set the plot background #set title attributes
GO_plot_all_pval
ggsave("../../../output/Slope_Base/GOSeq/OriginDEGs_P05_orderedpval.pdf", GO_plot_all_pval, width = 12, height = 24, units = c("in"), dpi=300, limitsize=FALSE)
##Get a list of KEGG Terms for all 9011 genes
KO.terms <- geneInfo %>%
dplyr::select(gene_id, KEGG)
#run goseq using Wallenius method for all categories of KEGG terms
KO.wall <-
goseq(
pwf,
ID.vector,
gene2cat = KO.terms,
test.cats = c("KEGG"),
method = "Wallenius",
use_genes_without_cat = TRUE
)
## Using manually entered categories.
## Calculating the p-values...
KO <- KO.wall[order(KO.wall$over_represented_pvalue), ]
colnames(KO)[1] <- "KEGGterm"
#Filtering for p < 0.05
KO_05 <- KO %>%
dplyr::filter(over_represented_pvalue < 0.05) %>%
dplyr::arrange(., over_represented_pvalue)
head(KO_05)
## KEGGterm over_represented_pvalue under_represented_pvalue numDEInCat numInCat
## 1 K01052 0.002747631 0.9999336 3 4
## 2 K01539 0.005115479 1.0000000 2 2
## 3 K05658 0.005129921 1.0000000 2 2
## 4 K05619 0.005222261 1.0000000 2 2
## 5 K11789 0.006211963 0.9996514 3 6
## 6 K01363 0.006678092 1.0000000 2 2
dim(KO_05)
## [1] 21 5
#Write file of results
write.csv(KO, "../../../output/Slope_Base/GOSeq/GOseq_KEGG_Origin.csv")
Over-represented KEGG terms, p = 0.05: “K01052” “K01539” “K05658” “K05619” “K11789” “K01363” “K13348” “K01345” “K24436” “K17592” “K25493” “K24125” “K12825” “K08059” “K05929” “K19721” “K14480” “K03671” “K06711” “K13443” “K18405”
### Generate vector with names of the 346 significant DEGs by Origin that are upregulated in the Slope origin in both treatments
ID.vector_upSlope <- geneInfo %>%
filter(q_Origin < 0.05) %>%
filter(Stable_OriginFC > 0 & Variable_OriginFC > 0) %>%
pull(gene_id)
##Construct list of genes with 1 for genes in that are significant for Origin and upregulated in Slope and 0 for those that are not
gene.vector_upSlope = as.integer(ALL.vector %in% ID.vector_upSlope)
names(gene.vector_upSlope) <- ALL.vector #set names
#weight gene vector by bias for length of gene
pwf_upSlope <- nullp(gene.vector_upSlope, ID.vector_upSlope, bias.data = LENGTH.vector)
#run goseq using Wallenius method for all categories of GO terms
GO.wall_upSlope <- goseq(pwf_upSlope, ID.vector_upSlope, gene2cat=GO.terms, method="Wallenius", use_genes_without_cat=TRUE)
## Using manually entered categories.
## Calculating the p-values...
## 'select()' returned 1:1 mapping between keys and columns
GO_upSlope <- GO.wall_upSlope[order(GO.wall_upSlope$over_represented_pvalue),]
colnames(GO_upSlope)[1] <- "GOterm"
#Write file of results
write.csv(GO_upSlope, "../../../output/Slope_Base/GOSeq/GOseq_DEG_Origin_upSlope.csv")
#Filtering for p < 0.05
GO_upSlope_05 <- GO_upSlope %>%
dplyr::filter(over_represented_pvalue<0.05) %>%
dplyr::arrange(., ontology, over_represented_pvalue)
#Filtering for p < 0.05
GO_upSlope_001 <- GO_upSlope %>%
dplyr::filter(over_represented_pvalue<0.001) %>%
dplyr::arrange(., ontology, over_represented_pvalue)
dim(GO_upSlope_05)
## [1] 422 7
dim(GO_upSlope_001)
## [1] 23 7
There are 422 over-represented GO terms (by p <0.05) in the 346 significant DEGs by Origin that are upregulated in the Slope origin in both treatments
Plotting!
ontologies<-c("BP", "CC", "MF")
for (category in ontologies) {
go_results <- GO_upSlope_05
go_results<-go_results%>%
filter(ontology==category)%>%
filter(over_represented_pvalue != "NA") %>%
filter(numInCat>10)%>%
arrange(., over_represented_pvalue)
#Reduce/collapse GO term set with the rrvgo package
simMatrix <- calculateSimMatrix(go_results$GOterm,
orgdb="org.Ce.eg.db", #c. elegans database
ont=category,
method="Rel")
#calculate similarity
scores <- setNames(-log(go_results$over_represented_pvalue), go_results$GOterm)
reducedTerms <- reduceSimMatrix(simMatrix,
scores,
threshold=0.7,
orgdb="org.Ce.eg.db")
#keep only the goterms from the reduced list
go_results<-go_results%>%
filter(GOterm %in% reducedTerms$go)
#add in parent terms to list of go terms
go_results$ParentTerm<-reducedTerms$parentTerm[match(go_results$GOterm, reducedTerms$go)]
#plot significantly enriched GO terms by Slim Category faceted by slim term
GO.plot_upslope <- ggplot2::ggplot(go_results, aes(x = ontology, y = term)) +
geom_point(aes(size=over_represented_pvalue)) +
scale_size(name="Over rep. p-value", trans="reverse", range=c(1,3))+
facet_grid(ParentTerm ~ ., scales = "free", labeller = label_wrap_gen(width = 5, multi_line = TRUE))+
theme_bw() + theme(panel.border = element_blank(), panel.grid.major = element_blank(),
panel.grid.minor = element_blank(), axis.line = element_line(colour = "black"),
strip.text.y = element_text(angle=0, size = 10),
strip.text.x = element_text(size = 20),
axis.text = element_text(size = 8),
axis.title.x = element_blank(),
axis.title.y = element_blank())
print(GO.plot_upslope)
ggsave(filename=paste0("../../../output/Slope_Base/GOSeq/OriginDEGs_upslope_P05", "_", category, ".png"), plot=GO.plot_upslope, dpi=100, width=12, height=48, units="in", limitsize=FALSE)
}
## preparing gene to GO mapping data...
## preparing IC data...
## preparing gene to GO mapping data...
## preparing IC data...
## preparing gene to GO mapping data...
## preparing IC data...
### Generate vector with names of the 481 significant DEGs by Origin that are upregulated in the Flat origin in both treatments
ID.vector_upFlat <- geneInfo %>%
filter(q_Origin < 0.05) %>%
filter(Stable_OriginFC < 0 & Variable_OriginFC < 0) %>%
pull(gene_id)
##Construct list of genes with 1 for genes in that are significant for Origin and upregulated in Flat and 0 for those that are not
gene.vector_upFlat = as.integer(ALL.vector %in% ID.vector_upFlat)
names(gene.vector_upFlat) <- ALL.vector #set names
#weight gene vector by bias for length of gene
pwf_upFlat <- nullp(gene.vector_upFlat, ID.vector_upFlat, bias.data = LENGTH.vector)
#run goseq using Wallenius method for all categories of GO terms
GO.wall_upFlat <- goseq(pwf_upFlat, ID.vector_upFlat, gene2cat=GO.terms, method="Wallenius", use_genes_without_cat=TRUE)
## Using manually entered categories.
## Calculating the p-values...
## 'select()' returned 1:1 mapping between keys and columns
GO_upFlat <- GO.wall_upFlat[order(GO.wall_upFlat$over_represented_pvalue),]
colnames(GO_upFlat)[1] <- "GOterm"
#Write file of results
write.csv(GO_upFlat, "../../../output/Slope_Base/GOSeq/GOseq_DEG_Origin_upFlat.csv")
#Filtering for p < 0.05
GO_upFlat_05 <- GO_upFlat %>%
dplyr::filter(over_represented_pvalue<0.05) %>%
dplyr::arrange(., ontology, over_represented_pvalue)
#Filtering for p < 0.05
GO_upFlat_001 <- GO_upFlat %>%
dplyr::filter(over_represented_pvalue<0.001) %>%
dplyr::arrange(., ontology, over_represented_pvalue)
dim(GO_upFlat_05)
## [1] 243 7
dim(GO_upFlat_001)
## [1] 0 7
There are 243 over-represented GO terms (by p <0.05) in the 481 significant DEGs by Origin that are upregulated in the Flat origin in both treatments
Plotting!
ontologies<-c("BP", "CC", "MF")
for (category in ontologies) {
go_results <- GO_upFlat_05
go_results<-go_results%>%
filter(ontology==category)%>%
filter(over_represented_pvalue != "NA") %>%
#filter(numInCat>10)%>%
arrange(., over_represented_pvalue)
#Reduce/collapse GO term set with the rrvgo package
simMatrix <- calculateSimMatrix(go_results$GOterm,
orgdb="org.Ce.eg.db", #c. elegans database
ont=category,
method="Rel")
#calculate similarity
scores <- setNames(-log(go_results$over_represented_pvalue), go_results$GOterm)
reducedTerms <- reduceSimMatrix(simMatrix,
scores,
threshold=0.7,
orgdb="org.Ce.eg.db")
#keep only the goterms from the reduced list
go_results<-go_results%>%
filter(GOterm %in% reducedTerms$go)
#add in parent terms to list of go terms
go_results$ParentTerm<-reducedTerms$parentTerm[match(go_results$GOterm, reducedTerms$go)]
#plot significantly enriched GO terms by Slim Category faceted by slim term
GO.plot_upFlat <- ggplot2::ggplot(go_results, aes(x = ontology, y = term)) +
geom_point(aes(size=over_represented_pvalue)) +
scale_size(name="Over rep. p-value", trans="reverse", range=c(1,3))+
facet_grid(ParentTerm ~ ., scales = "free", labeller = label_wrap_gen(width = 5, multi_line = TRUE))+
theme_bw() + theme(panel.border = element_blank(), panel.grid.major = element_blank(),
panel.grid.minor = element_blank(), axis.line = element_line(colour = "black"),
strip.text.y = element_text(angle=0, size = 10),
strip.text.x = element_text(size = 20),
axis.text = element_text(size = 8),
axis.title.x = element_blank(),
axis.title.y = element_blank())
print(GO.plot_upFlat)
ggsave(filename=paste0("../../../output/Slope_Base/GOSeq/OriginDEGs_upFlat_P05", "_", category, ".png"), plot=GO.plot_upFlat, dpi=100, width=12, height=12, units="in", limitsize=FALSE)
}
## preparing gene to GO mapping data...
## preparing IC data...
## preparing gene to GO mapping data...
## preparing IC data...
## preparing gene to GO mapping data...
## preparing IC data...
Stable_OriginFC > 0 & Variable_OriginFC < 0
### Generate vector with names of the 11 significant DEGs by Origin that are upregulated in the Slope origin in only the stable treatment
ID.vector_Slope_upStable_downVariable <- geneInfo %>%
filter(q_Origin < 0.05) %>%
filter(Stable_OriginFC > 0 & Variable_OriginFC < 0) %>%
pull(gene_id)
##Construct list of genes with 1 for genes in that are significant for Origin and upregulated in Slope and 0 for those that are not
gene.vector_updownSlope = as.integer(ALL.vector %in% ID.vector_Slope_upStable_downVariable)
names(gene.vector_updownSlope) <- ALL.vector #set names
#weight gene vector by bias for length of gene
pwf_updownSlope <- nullp(gene.vector_updownSlope, ID.vector_Slope_upStable_downVariable, bias.data = LENGTH.vector)
#run goseq using Wallenius method for all categories of GO terms
GO.wall_updownSlope <- goseq(pwf_updownSlope, ID.vector_Slope_upStable_downVariable, gene2cat=GO.terms, method="Wallenius", use_genes_without_cat=TRUE)
## Using manually entered categories.
## Calculating the p-values...
## 'select()' returned 1:1 mapping between keys and columns
GO_updownSlope <- GO.wall_updownSlope[order(GO.wall_updownSlope$over_represented_pvalue),]
colnames(GO_updownSlope)[1] <- "GOterm"
#Write file of results
write.csv(GO_updownSlope, "../../../output/Slope_Base/GOSeq/GOseq_DEG_Origin_Slope_upStable_downVariable.csv")
#Filtering for p < 0.05
GO_updownSlope_05 <- GO_updownSlope %>%
dplyr::filter(over_represented_pvalue<0.05) %>%
dplyr::arrange(., ontology, over_represented_pvalue)
#Filtering for p < 0.001
GO_updownSlope_001 <- GO_updownSlope %>%
dplyr::filter(over_represented_pvalue<0.001) %>%
dplyr::arrange(., ontology, over_represented_pvalue)
dim(GO_updownSlope_05)
## [1] 151 7
dim(GO_updownSlope_001)
## [1] 0 7
There are 151 over-represented GO terms (by p <0.05) in the 11 significant DEGs by Origin that are upregulated in the Slope origin in Stable treatment and downregulated in the variable treatment
Plotting!
ontologies<-c("BP", "CC", "MF")
for (category in ontologies) {
go_results <- GO_updownSlope_05
go_results<-go_results%>%
filter(ontology==category)%>%
filter(over_represented_pvalue != "NA") %>%
filter(numInCat>10)%>%
arrange(., over_represented_pvalue)
#Reduce/collapse GO term set with the rrvgo package
simMatrix <- calculateSimMatrix(go_results$GOterm,
orgdb="org.Ce.eg.db", #c. elegans database
ont=category,
method="Rel")
#calculate similarity
scores <- setNames(-log(go_results$over_represented_pvalue), go_results$GOterm)
reducedTerms <- reduceSimMatrix(simMatrix,
scores,
threshold=0.7,
orgdb="org.Ce.eg.db")
#keep only the goterms from the reduced list
go_results<-go_results%>%
filter(GOterm %in% reducedTerms$go)
#add in parent terms to list of go terms
go_results$ParentTerm<-reducedTerms$parentTerm[match(go_results$GOterm, reducedTerms$go)]
#plot significantly enriched GO terms by Slim Category faceted by slim term
GO.plot_updownSlope <- ggplot2::ggplot(go_results, aes(x = ontology, y = term)) +
geom_point(aes(size=over_represented_pvalue)) +
scale_size(name="Over rep. p-value", trans="reverse", range=c(1,3))+
facet_grid(ParentTerm ~ ., scales = "free", labeller = label_wrap_gen(width = 5, multi_line = TRUE))+
theme_bw() + theme(panel.border = element_blank(), panel.grid.major = element_blank(),
panel.grid.minor = element_blank(), axis.line = element_line(colour = "black"),
strip.text.y = element_text(angle=0, size = 10),
strip.text.x = element_text(size = 20),
axis.text = element_text(size = 8),
axis.title.x = element_blank(),
axis.title.y = element_blank())
print(GO.plot_updownSlope)
ggsave(filename=paste0("../../../output/Slope_Base/GOSeq/OriginDEGs_updownSlope_P05", "_", category, ".png"), plot=GO.plot_updownSlope, dpi=100, width=12, height=24, units="in", limitsize=FALSE)
}
## preparing gene to GO mapping data...
## preparing IC data...
## preparing gene to GO mapping data...
## preparing IC data...
## preparing gene to GO mapping data...
## preparing IC data...
Stable_OriginFC < 0 & Variable_OriginFC > 0
2 genes: one is unannotated (“Pocillopora_acuta_HIv2___RNAseq.g5107.t1a”) and one is:
Pocillopora_acuta_HIv2___RNAseq.g7761.t1 6087.XP_002155939.2 Belongs to the DHHC palmitoyltransferase family
### Generate vector with names of the 2 significant DEGs by Origin that are upregulated in the Slope origin in both treatments
ID.vector_Slope_downStable_upVariable <- geneInfo %>%
filter(q_Origin < 0.05) %>%
filter(Stable_OriginFC < 0 & Variable_OriginFC > 0) %>%
pull(gene_id)
##Construct list of genes with 1 for genes in that are significant for Origin and upregulated in Slope and 0 for those that are not
gene.vector_downupSlope = as.integer(ALL.vector %in% ID.vector_Slope_downStable_upVariable)
names(gene.vector_downupSlope) <- ALL.vector #set names
#weight gene vector by bias for length of gene
pwf_downupSlope <- nullp(gene.vector_downupSlope, ID.vector_Slope_downStable_upVariable, bias.data = LENGTH.vector)
#run goseq using Wallenius method for all categories of GO terms
GO.wall_downupSlope <- goseq(pwf_downupSlope, ID.vector_downupSlope, gene2cat=GO.terms, method="Wallenius", use_genes_without_cat=TRUE)
## Using manually entered categories.
## Calculating the p-values...
## 'select()' returned 1:1 mapping between keys and columns
GO_downupSlope <- GO.wall_downupSlope[order(GO.wall_downupSlope$over_represented_pvalue),]
colnames(GO_downupSlope)[1] <- "GOterm"
#Write file of results
write.csv(GO_downupSlope, "../../../output/Slope_Base/GOSeq/GOseq_DEG_Origin_Slope_downStable_upVariable.csv")
#Filtering for p < 0.05
GO_downupSlope_05 <- GO_downupSlope %>%
dplyr::filter(over_represented_pvalue<0.05) %>%
dplyr::arrange(., ontology, over_represented_pvalue)
#Filtering for p < 0.001
GO_downupSlope_001 <- GO_downupSlope %>%
dplyr::filter(over_represented_pvalue<0.001) %>%
dplyr::arrange(., ontology, over_represented_pvalue)
dim(GO_downupSlope_05)
## [1] 11 7
dim(GO_downupSlope_001)
## [1] 0 7
There are 11 over-represented GO terms (by p <0.05) in the 2 significant DEGs by Origin that are downregulated in the Slope origin in Stable treatment and upregulated in the variable treatment
Plotting!
ontologies<-c("BP", "MF")
for (category in ontologies) {
go_results <- GO_downupSlope_05
go_results<-go_results%>%
filter(ontology==category)%>%
filter(over_represented_pvalue != "NA") %>%
#filter(numInCat>10)%>%
arrange(., over_represented_pvalue)
#Reduce/collapse GO term set with the rrvgo package
simMatrix <- calculateSimMatrix(go_results$GOterm,
orgdb="org.Ce.eg.db", #c. elegans database
ont=category,
method="Rel")
#calculate similarity
scores <- setNames(-log(go_results$over_represented_pvalue), go_results$GOterm)
reducedTerms <- reduceSimMatrix(simMatrix,
scores,
threshold=0.7,
orgdb="org.Ce.eg.db")
#keep only the goterms from the reduced list
go_results<-go_results%>%
filter(GOterm %in% reducedTerms$go)
#add in parent terms to list of go terms
go_results$ParentTerm<-reducedTerms$parentTerm[match(go_results$GOterm, reducedTerms$go)]
#plot significantly enriched GO terms by Slim Category faceted by slim term
GO.plot_downupSlope <- ggplot2::ggplot(go_results, aes(x = ontology, y = term)) +
geom_point(aes(size=over_represented_pvalue)) +
scale_size(name="Over rep. p-value", trans="reverse", range=c(1,3))+
facet_grid(ParentTerm ~ ., scales = "free", labeller = label_wrap_gen(width = 5, multi_line = TRUE))+
theme_bw() + theme(panel.border = element_blank(), panel.grid.major = element_blank(),
panel.grid.minor = element_blank(), axis.line = element_line(colour = "black"),
strip.text.y = element_text(angle=0, size = 10),
strip.text.x = element_text(size = 20),
axis.text = element_text(size = 8),
axis.title.x = element_blank(),
axis.title.y = element_blank())
print(GO.plot_downupSlope)
ggsave(filename=paste0("../../../output/Slope_Base/GOSeq/OriginDEGs_downupSlope_P05", "_", category, ".png"), plot=GO.plot_downupSlope)
}
## preparing gene to GO mapping data...
## preparing IC data...
## Saving 7 x 5 in image
## preparing gene to GO mapping data...
##
## preparing IC data...
## Saving 7 x 5 in image